
###############################################
########## JEPS OPTIMAL PERSUASION ############
###############################################

## Optimal Persuasion under Confirmation Bias: Theory and Evidence from a Registered Report
## Journal of Experimental Political Science
## Author: Love Christensen
## website: lovechristensen.com
## Contact: love.christensen@gmail.com

################################################
############ REPLICATION MATERIAL ##############
################################################

require(estimatr)
require(tidyverse)
require(tikzDevice)
require(gridExtra)
require(reshape2)
require(ggpubr)
require(ggplot2)

##########################################################
###### FILE 4: POWER ANALYSIS AND TREATMENT EFFECTS ######
##########################################################

## Please beware the the power analysis and model selection exercise takes a couple of days to run. All files are saved to the ../output/figures folder.

## GRAPH ORDER

# Figure 1. "Confirmation Bias Affects the Persuasiveness of Extreme Messages"
# Appendix Figure 1. "An Illustration of Discontinuous Discounting"
# Appendix Figure 11. "Effect Size and Variances as a Function of Discounting Parameters"
# Appendix Figure 12. "Power Analysis"
# Appendix Figure 13. "Model Selection using BIC and AIC"

##################################################
############### VISUALIZATIONS ###################
##################################################

####################################################
##################### FIGURE 1 #####################
####################################################

## one shot persuasion: exponential discounting
bf2 <- function(muInit, sigInit, x, r){
  bf.dat <- data.frame(mu0 = NA, sig0 = NA, mu1 = NA, sig1 = NA, sigest = NA, x = NA)
  mu0 <- muInit
  sig0 <- sigInit
  
  # shock 
  sigest <- exp(r * abs(mu0 - x))
  
  # updating
  mu1 <- mu0 * ((1/sig0) / ((1/sig0) + (1/sigest))) + x * ((1/sigest) / ((1/sig0) + (1/sigest)))
  sig1 <- 1/(1/sig0 + 1/sigest)
  bf.dat[1, ] <- c(mu0, sig0, mu1, sig1, sigest, x)
  return(bf.dat)
}

dat2 <- data.frame(mu = NA, sig = NA, x = NA, sigx = NA)
j <- 1
for(i in seq(0, 5, .05)){
  temp <- bf2(0, 2, i, 1)
  dat2[j, ] <- temp[, c(3, 4, 6, 5)]
  j <- j + 1
}


#################################################
## one shot persuasion: linear discounting
#################################################

bf3 <- function(muInit, sigInit, x, b){
  bf.dat <- data.frame(mu0 = NA, sig0 = NA, mu1 = NA, sig1 = NA, sigest = NA, x = NA)
  mu0 <- muInit
  sig0 <- sigInit
  
  # shock 
  sigest <- 1 + b * abs(muInit - x)
  
  # updating
  mu1 <- mu0 * ((1/sig0) / ((1/sig0) + (1/sigest))) + x * ((1/sigest) / ((1/sig0) + (1/sigest)))
  sig1 <- 1/(1/sig0 + 1/sigest)
  bf.dat[1, ] <- c(mu0, sig0, mu1, sig1, sigest, x)
  return(bf.dat)
}

dat3 <- data.frame(mu = NA, sig = NA, x = NA, sigx = NA)
j <- 1
for(i in seq(0, 5, .05)){
  temp <- bf3(muInit = 0, sigInit = 2, x = i, b = 1)
  dat3[j, ] <- temp[, c(3, 4, 6, 5)]
  j <- j + 1
}


######################################
## one shot persuasion: no discounting
######################################


bf5 <- function(muInit, sigInit, x, r){
  bf.dat <- data.frame(mu0 = NA, sig0 = NA, mu1 = NA, sig1 = NA, sigest = NA, x = NA)
  mu0 <- muInit
  sig0 <- sigInit
  
  # shock 
  sigest <- sig1
  
  # updating
  mu1 <- mu0 * ((1/sig0) / ((1/sig0) + (1/sigest))) + x * ((1/sigest) / ((1/sig0) + (1/sigest)))
  sig1 <- 1/(1/sig0 + 1/sigest)
  bf.dat[1, ] <- c(mu0, sig0, mu1, sig1, sigest, x)
  return(bf.dat)
}


dat5 <- data.frame(mu = NA, sig = NA, x = NA, sigx = NA)
j <- 1
sig1 <- 2
for(i in seq(0, 5, .05)){
  temp <- bf5(0, 2, i, 1)
  dat5[j, ] <- temp[, c(3, 4, 6, 5)]
  j <- j + 1
}


plot_expdisc_mu <- ggplot(data = dat2, aes(x = x, y = mu)) +
  geom_line() +
  theme_minimal() +
  xlab("\n Prediction distance ($x - \\hat\\mu_0$)") +
  ylab("Updating ($\\hat\\mu_1 - \\hat\\mu_0$) \n") +
  ggtitle("Exponential discounting \n") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,2.5))

plot_expdisc_sigx <- ggplot(data = dat2, aes(x = x, y = sigx)) +
  geom_line() +
  theme_minimal() +
  xlab("\n Prediction distance ($x - \\hat\\mu_0$)") +
  ylab("\n Uncertainty ($\\sigma^2_x$) \n") +
  ggtitle("\n $\\sigma^2_x = e^{r(x - \\hat\\mu_0)}$ \n") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,10))

plot_lindisc_mu <- ggplot(data = dat3, aes(x = x, y = mu)) +
  geom_line() +
  theme_minimal() +
  xlab("\n Prediction distance ($x - \\hat\\mu_0$)") +
  ylab("Updating ($\\hat\\mu_1 - \\hat\\mu_0$) \n") +
  ggtitle("Linear discounting \n") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,2.5))

plot_lindisc_sigx <- ggplot(data = dat3, aes(x = x, y = sigx)) +
  geom_line() +
  theme_minimal() +
  xlab("\n Prediction distance ($x - \\hat\\mu_0$)") +
  ylab("\n Uncertainty ($\\sigma^2_x$) \n") +
  ggtitle("\n $\\sigma^2_x = a + b(x - \\hat\\mu_0)$ \n") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,10))

plot_nodisc_mu <- ggplot(data = dat5, aes(x = x, y = mu)) +
  geom_line() +
  theme_minimal() +
  xlab("\n Prediction distance ($x - \\hat\\mu_0$)") +
  ylab("Updating ($\\hat\\mu_1 - \\hat\\mu_0$) \n") +
  ggtitle("No discounting \n") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,2.5))

plot_nodisc_sigx <- ggplot(data = dat5, aes(x = x, y = sigx)) +
  geom_line() +
  theme_minimal() +
  xlab("\n Prediction distance ($x - \\hat\\mu_0$)") +
  ylab("\n Uncertainty ($\\sigma^2_x$) \n") +
  ggtitle("\n $\\sigma^2_x = s^2$ \n") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,10))


tikz(file = "../output/figures/fig_disc_ex.tex", width = 9, height = 5)
grid.arrange(plot_nodisc_mu,
             plot_lindisc_mu, 
             plot_expdisc_mu,
             plot_nodisc_sigx,
             plot_lindisc_sigx,
             plot_expdisc_sigx,
             nrow = 2)
dev.off()


###################################################
################ APPENDIX FIGURE 1 ################
###################################################


disc_disc <- function(muInit, sigInit, sig_prime, x, x_tau){
  bf.dat <- data.frame(mu0 = NA, sig0 = NA, mu1 = NA, sig1 = NA, sigest = NA, x = NA)
  mu0 <- muInit
  sig0 <- sigInit
  
  # shock 
  sigest <- ifelse(x <= x_tau, sig0, sig_prime)
  
  # updating
  mu1 <- mu0 * ((1/sig0) / ((1/sig0) + (1/sigest))) + x * ((1/sigest) / ((1/sig0) + (1/sigest)))
  sig1 <- 1/(1/sig0 + 1/sig_prime)
  bf.dat[1, ] <- c(mu0, sig0, mu1, sig1, sigest, x)
  return(bf.dat)
}



dat_disc <- data.frame(mu = NA, sig = NA, x = NA, sigx = NA)
j <- 1
for(i in seq(0, 5, .01)){
  temp <- disc_disc(0, 2, 7.5, i, 2.5)
  dat_disc[j, ] <- temp[, c(3, 4, 6, 5)]
  j <- j + 1
}

glimpse(dat_disc)
dat_disc <- dat_disc %>%
  mutate(above_threshold = ifelse(x > 2.5, 1, 0))


plot_discdisc_mu <- ggplot(data = subset(dat_disc, above_threshold == 0), 
                           aes(x = x, y = mu)) +
  geom_line() +
  geom_line(data = subset(dat_disc, above_threshold == 1), 
            aes(x = x, y = mu)) +
  theme_minimal() +
  xlab("$x - \\hat\\mu_0$") +
  ylab("\n$\\hat\\mu_1$") +
  ggtitle("") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,2.5),
                  xlim = c(0, 5))

plot_discdisc_sigx <- ggplot(data = subset(dat_disc, above_threshold == 0),
                             aes(x = x, y = sigx)) +
  geom_line() +
  geom_line(data = subset(dat_disc, above_threshold == 1), 
            aes(x = x, y = sigx)) +
  theme_minimal() +
  xlab("$x - \\hat\\mu_0$") +
  ylab("\n $\\sigma^2_x$") +
  ggtitle("") +
  theme(aspect.ratio=1,
        plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim=c(0,10))

tikz(file = "../output/figures/fig_discdisc_ex.tex", width = 6, height = 3)
grid.arrange(plot_discdisc_mu,
             plot_discdisc_sigx,
             nrow = 1)
dev.off()


###################################################
################# APPENDIX FIGURE 11 ##############
###################################################


### NO DISCOUNTING ###
mfx_nd <- function(s2_x, sig2_0){ 
  mfx_dat <- data.frame("x1" = NA, "x10" = NA, "x20" = NA, "sig2_x" = NA)
  mfx_dat$x1 <- (1/s2_x) / ((1/sig2_0) + (1/s2_x))
  mfx_dat$x10 <- (1/s2_x) / ((1/sig2_0) + (1/s2_x))
  mfx_dat$x20 <- (1/s2_x) / ((1/sig2_0) + (1/s2_x))
  mfx_dat$sig2_x <- s2_x
  return(mfx_dat)
}

dat_mfx_nd <- data.frame(mfx_x1 = NA, mfx_x10 = NA, mfx_x20 = NA, sig2_x = NA)
j <- 1
for(i in seq(1, 50, 25)){
  temp <- mfx_nd(i, 1)
  dat_mfx_nd[j, ] <- temp
  j <- j + 1
}

ggplot(dat_mfx_nd, aes(sig2_x, mfx_x1)) +
  geom_line() +
  ylim(0, 1)

### linear discounting ###

mfx_ld <- function(s2_x, sig2_0){ 
  mfx_dat <- data.frame("x1" = NA, "x10" = NA, "x20" = NA, "sig2_x" = NA)
  mfx_dat$x1 <- ((1/(1 + 1 * s2_x)) / ((1/sig2_0) + (1/(1 + 1 * s2_x)))) / sqrt(sig2_0)
  mfx_dat$x10 <- ((1/(1 + 10 * s2_x)) / ((1/sig2_0) + (1/(1 + 10 * s2_x))))  / sqrt(sig2_0)
  mfx_dat$x20 <- ((1/(1 + 20 * s2_x)) / ((1/sig2_0) + (1/(1 + 20 * s2_x)))) / sqrt(sig2_0)
  mfx_dat$sig2_x <- s2_x
  return(mfx_dat)
}

dat_mfx_ld <- data.frame(mfx_x1 = NA, mfx_x10 = NA, mfx_x20 = NA, sig2_x = NA)
j <- 1
for(i in seq(1, 25, 1)){
  temp <- mfx_ld(i, 25)
  dat_mfx_ld[j, ] <- temp
  j <- j + 1
}

dat_mfx_ld <- melt(dat_mfx_ld, id.vars = "sig2_x")

ggplot(dat_mfx_ld, aes(sig2_x, value, color = variable)) +
  geom_line()

### exponential discounting ###

mfx_ed <- function(s2_x, sig2_0){ 
  mfx_dat <- data.frame("x1" = NA, "x10" = NA, "x20" = NA, "sig2_x" = NA)
  mfx_dat$x1 <- (1/exp(s2_x * 1)) / ((1/sig2_0) + (1/exp(s2_x * 1))) / sqrt(sig2_0)
  mfx_dat$x10 <- (1/exp(s2_x * 10)) / ((1/sig2_0) + (1/exp(s2_x * 10))) / sqrt(sig2_0)
  mfx_dat$x20 <- (1/exp(s2_x * 20)) / ((1/sig2_0) + (1/exp(s2_x * 20))) / sqrt(sig2_0)
  mfx_dat$sig2_x <- s2_x
  return(mfx_dat)
}

dat_mfx_ed <- data.frame(mfx_x1 = NA, mfx_x10 = NA, mfx_x20 = NA, sig2_x = NA)
j <- 1
for(i in seq(0.1, 1, 0.01)){
  temp <- mfx_ed(i, 25)
  dat_mfx_ed[j, ] <- temp
  j <- j + 1
}

dat_mfx_ed <- melt(dat_mfx_ed, id.vars = "sig2_x")
ggplot(dat_mfx_ed, aes(sig2_x, value, color = variable)) +
  geom_line()

## linear discounting

fxsz_ld <- function(a, b, sig2_0, x){ 
  fxsz_dat <- data.frame("mu1" = 0, "mu_sig2" = NA, "x" = x, "a" = a, "b" = b, "sig2_0" = sig2_0,  "x_sig2" = NA)
  fxsz_dat$mu1 <- (x/(a + b * x)) / ((1/sig2_0) + (1/(a + b * x)))
  fxsz_dat$mu_sig2 <- fxsz_dat$mu1 / sqrt(fxsz_dat$sig2_0)
  fxsz_dat$x_sig2 <- fxsz_dat$x / sqrt(fxsz_dat$sig2_0)
  return(fxsz_dat)
}

plot(seq(1:20), fxsz_ld(1, 5, 25, seq(1:20))[, 1])
plot(seq(1:20), fxsz_ld(1, 5, 25, seq(1:20))[, 2])
plot(seq(1:20), fxsz_ld(1, 2.5, 25, seq(1:20))[, 1])
plot(seq(1:20), fxsz_ld(1, 2.5, 25, seq(1:20))[, 2])

k <- 1
fxsz_ld_dat <- data.frame("mu1" = NA, "mu_sig2" = NA, "x" = NA, "x_sig2" = NA, "b" = NA)
for(b in seq(0, 10, 1)){
  fxsz_ld_dat[k:(k - 1 + length(seq(0.1, 20, 0.1))), ] <- fxsz_ld(1, b, 25, seq(0.1, 20, 0.1))[, c(1, 2, 3, 7, 5)]
  k <- k + length(seq(0.1, 20, 0.1))
}
fxsz_ld_dat$b_sig2 <- fxsz_ld_dat$b / sqrt(25)

plot_ez_ld <- ggplot(fxsz_ld_dat, aes(x_sig2, mu_sig2, colour = factor(b_sig2))) +
  geom_line() +
  theme_minimal() +
  labs(color='$b/\\sigma_0$') +
  xlab("$x/\\sigma_0$") +
  ylab("$\\mu_1/\\sigma_0$") +
  ggtitle("$g(x) = 1 + b \\cdot x$") +
  theme(plot.title = element_text(hjust = 0.5),
        text = element_text(size = 13))


## exponential discounting

fxsz_ed <- function(r, sig2_0, x){ 
  fxsz_dat <- data.frame("mu1" = 0, "mu_sig2" = NA, "x" = x, "r" = r, "sig2_0" = sig2_0, "x_sig2" = NA)
  fxsz_dat$mu1 <- (x/exp(r * x)) / ((1/sig2_0) + (1/exp(r * x)))
  fxsz_dat$mu_sig2 <- fxsz_dat$mu1 / sqrt(fxsz_dat$sig2_0)
  fxsz_dat$x_sig2 <- fxsz_dat$x / sqrt(fxsz_dat$sig2_0)
  return(fxsz_dat)
}

plot(seq(1:20), fxsz_ed(1, 25, seq(1:20))[, 1], type = "l")
plot(seq(1:20), fxsz_ed(1, 25, seq(1:20))[, 2], type = "l")
plot(seq(1:20), fxsz_ed(0.5, 25, seq(1:20))[, 1], type = "l")
plot(seq(1:20), fxsz_ed(0.5, 25, seq(1:20))[, 2], type = "l")

k <- 1
fxsz_ed_dat <- data.frame("mu1" = NA, "mu_sig2" = NA, "x" = NA, "x_sig2" = NA, "r" = NA)
for(r in seq(0, 1, 0.1)){
  fxsz_ed_dat[k:(k - 1 + length(seq(0.1, 20, 0.1))), ] <- fxsz_ed(r, 25, seq(0.1, 20, 0.1))[, c(1, 2, 3, 6, 4)]
  k <- k + length(seq(0.1, 20, 0.1))
}

plot_ez_ed <- ggplot(fxsz_ed_dat, aes(x_sig2, mu_sig2, colour = factor(r))) +
  geom_line() +
  theme_minimal() +
  labs(color='$r$') +
  xlab("$x/\\sigma_0$") +
  ylab("$\\mu_1/\\sigma_0$") +
  ggtitle("$g(x) = exp(r \\cdot x$)") +
  theme(plot.title = element_text(hjust = 0.5),
        text = element_text(size = 13))

##### VARIANCES ######

## linear discounting 

fxsz_ld_s2x <- function(a, b, x){ 
  fxsz_dat <- data.frame("x" = x, "a" = a, "b" = b, "s2_x" = NA)
  fxsz_dat$s2_x <- a + b * x
  return(fxsz_dat)
}

k <- 1
fxsz_ld_s2x_dat <- data.frame("x" = NA, "s2_x" = NA, "b" = NA)
for(b in seq(0, 10, 1)){
  fxsz_ld_s2x_dat[k:(k - 1 + length(seq(0.1, 20, 0.1))), ] <- fxsz_ld_s2x(1, b, seq(0.1, 20, 0.1))[, c(1, 4, 3)]
  k <- k + length(seq(0.1, 20, 0.1))
}
fxsz_ld_s2x_dat$s2x_s20 <- fxsz_ld_s2x_dat$s2_x/25
fxsz_ld_s2x_dat$x_s20 <- fxsz_ld_s2x_dat$x/sqrt(25)
fxsz_ld_s2x_dat$b_sig2 <- fxsz_ld_s2x_dat$b/sqrt(25)

plot_ez_ld_s2x <- ggplot(fxsz_ld_s2x_dat, aes(x_s20, s2x_s20, colour = factor(b_sig2))) +
  geom_line() +
  theme_minimal() +
  labs(color='$b/\\sigma_0$') +
  xlab("$x/\\sigma_0$") +
  ylab("$\\sigma^2_x/\\sigma^2_0$") +
  ggtitle("") +
  ylim(0, 10) +
  theme(text = element_text(size = 13))

## exponential discounting


fxsz_ed_s2x <- function(r, x){ 
  fxsz_dat <- data.frame("x" = x, "r" = r, "s2_x" = NA)
  fxsz_dat$s2_x <- exp(r * x)
  return(fxsz_dat)
}

k <- 1
fxsz_ed_s2x_dat <- data.frame("x" = NA, "s2_x" = NA, "r" = NA)
for(r in seq(0, 1, 0.1)){
  fxsz_ed_s2x_dat[k:(k - 1 + length(seq(0.1, 20, 0.1))), ] <- fxsz_ed_s2x(r, seq(0.1, 20, 0.1))[, c(1, 3, 2)]
  k <- k + length(seq(0.1, 20, 0.1))
}
fxsz_ed_s2x_dat$s2x_s20 <- fxsz_ed_s2x_dat$s2_x/25
fxsz_ed_s2x_dat$x_s20 <- fxsz_ed_s2x_dat$x/sqrt(25)

plot_ez_ed_s2x <- ggplot(fxsz_ed_s2x_dat, aes(x_s20, s2x_s20, colour = factor(r))) +
  geom_line() +
  theme_minimal() +
  labs(color='$r$') +
  xlab("$x/\\sigma_0$") +
  ylab("$\\sigma^2_x/\\sigma^2_0$") +
  ggtitle("") +
  coord_cartesian(ylim=c(0,10)) +
  theme(text = element_text(size = 13))


tikz(file = "../output/figures/fig_effect_sizes_variance.tex", width = 7.5, height = 7.5)
grid.arrange(plot_ez_ld,
             plot_ez_ed,
             plot_ez_ld_s2x,
             plot_ez_ed_s2x,
             nrow = 2)
dev.off()


####################################################
################### POWER ANALYSIS #################
####################################################

####################################################
################# APPENDIX FIGURE 12 ###############
####################################################

########################################################
################## NO DISCOUNTING ######################
########################################################


no_discounting <- function(mu0, sig2_0, sig2_x, x){
  bf.dat <- data.frame(mu0 = NA, sig2_0 = NA, mu1 = NA, sig2_1 = NA, sig2_x = NA, x = NA)
  
  # updating
  mu1 <- mu0 * ((1/sig2_0) / ((1/sig2_0) + (1/sig2_x))) + x * ((1/sig2_x) / ((1/sig2_0) + (1/sig2_x)))
  sig2_1 <- 1/(1/sig2_0 + 1/sig2_x)
  bf.dat[1, ] <- c(mu0, sig2_0, mu1, sig2_1, sig2_x, x)
  return(bf.dat)
}

sig.seq <- seq(70, 450, 20)
observation.seq <- c(3000, 4000, 5000)
dat_power <- data.frame("observations" = rep(observation.seq, each = length(sig.seq)), 
                        "sig.var" = rep(sig.seq, length(observation.seq)),
                        "significant" = 0, 
                        "attempts" = 0, 
                        "share" = NA, 
                        "ate_over_sig2_0" = 0)

for(sig.var in sig.seq){
  for(N0 in observation.seq){
    set.seed(1001)
    simulations <- 250
    
    k <- 1
    for(k in 1:simulations){
      observations <- N0
      
      dat_nd <- data.frame(mu = NA, sig = NA, x = NA)
      i <- 1
      for(i in 1:observations){
        x_seq <- seq(from = 0, to = 20, length.out = 21)
        temp <- no_discounting(mu0 = 0, sig2_0 = 25, sig2_x = sig.var, x = x_seq[sample(21, 1, replace = TRUE)])
        temp$mu1 <- temp$mu1 + rnorm(1, 0, sqrt(temp$sig2_0))
        dat_nd[i, ] <- temp[, c(3, 4, 6)]
        i <- i + 1
      }
      mod_nd1 <- lm_robust(mu ~ x, 
                           data = dat_nd,
                           se = "stata")
      
      
      dat_power[dat_power$sig.var == sig.var& dat_power$observations == observations, "significant"] <- dat_power[dat_power$sig.var == sig.var & dat_power$observations == observations, "significant"] + 
        ifelse(summary(mod_nd1)$coef[2, 4] <= 0.05, 1, 0)
      dat_power[dat_power$sig.var == sig.var & dat_power$observations == observations, "attempts"] <- k
      dat_power[dat_power$sig.var == sig.var & dat_power$observations == observations, "share"] <- dat_power[dat_power$sig.var == sig.var & dat_power$observations == observations, "significant"] / 
        dat_power[dat_power$sig.var == sig.var & dat_power$observations == observations, "attempts"]
      k <- k + 1
    }
    sig2_0 <- 25
    
    dat_power[dat_power$sig.var == sig.var, "ate_over_sig2_0"] <-  10 * ((1/sig.var) / ((1/sig2_0) + (1/sig.var))) / sqrt(sig2_0)
  }
  print(dat_power[dat_power$sig.var == sig.var, ])
}

dat_power_nd <- dat_power

#############################################################################
########################### LINEAR DISCOUNTING ##############################
#############################################################################


linear_discounting <- function(mu0, sig2_0, b, x){
  bf.dat <- data.frame(mu0 = NA, sig2_0 = NA, mu1 = NA, x = NA)
  
  # variance of prediction
  # s2_x = b
  sig2_x <- 1 + b * abs(mu0 - x)
  
  # updating
  mu1 <- mu0 * ((1/sig2_0) / ((1/sig2_0) + (1/sig2_x))) + x * ((1/sig2_x) / ((1/sig2_0) + (1/sig2_x)))
  sig1 <- 1/(1/sig2_0 + 1/sig2_x)
  bf.dat[1, ] <- c(mu0, sig2_0, mu1, x)
  return(bf.dat)
}


b.seq <- seq(from = 5, to = 20, by = 1)
observation.seq <- c(3000, 4000, 5000)
## building a data frame to summarize power analysis
dat_power <- data.frame("b" = rep(b.seq, length(observation.seq)),
                        "observations" = rep(observation.seq, each = length(b.seq)),
                        "expectedTreatmentOverSig0" = 0,
                        "expectedEffectOverSig0" = 0,
                        "significant.all_mod2" = 0, 
                        "attempts" = 0)

for(b0 in  b.seq){
  for(N0 in observation.seq){
    set.seed(1001)
    simulations <- 250
    k <- 1
    for(k in 1:simulations){
      observations <- N0
      
      dat_ld <- data.frame(mu = NA, sig = NA, x = NA)
      i <- 1
      for(i in 1:observations){
        s2_0 <- 25 # variance of sigma^2_0
        temp <- linear_discounting(mu0 = 0, sig2_0 = s2_0,  b = b0, round(runif(1, 0, 20))) # use LD function
        temp$mu1 <- temp$mu1 + rnorm(1, 0, sqrt(s2_0))
        dat_ld[i, ] <- temp[, c(3, 2, 4)]
        i <- i + 1 #change to i
      }

      mod_ld2 <- lm_robust(mu ~  I(x) + I(x^2), 
                           data = dat_ld,
                           se = "stata")
      
      ## add +1 to account for number of attempts
      dat_power[dat_power$b == b0, "attempts"] <- k
      
      ## summary statistics model 2
      # all coefficients
      dat_power[dat_power$b == b0 & dat_power$observations == observations, "significant.all_mod2"] <- dat_power[dat_power$b == b0 & dat_power$observations == observations, "significant.all_mod2"] +
        ifelse(all(summary(mod_ld2)$coef[2:3, 4] <= 0.05), 1, 0)
      
      dat_power[dat_power$b == b0 & dat_power$observations == observations, "share.all_mod2"] <- dat_power[dat_power$b == b0 & dat_power$observations == observations, "significant.all_mod2"] / 
        dat_power[dat_power$b == b0 & dat_power$observations == observations, "attempts"]
      
      k <- k + 1
    }
    # the calculations of treatment effects used to be here but I believe that was unnecessary
    dat_power[dat_power$b == b0, "expectedTreatmentOverSig0"] <- linear_discounting(0, 25, b0, 10)[3]/5
    dat_power[dat_power$b == b0, "expectedEffectOverSig0"] <- Reduce("+", sapply(0:20, function(x) linear_discounting(0, 25, b0, x)[3]))/ (21 * 5)
  }
  print(dat_power[dat_power$b == b0, ])
}

dat_power_ld <- dat_power


################################################################
################# EXPONENTIAL DISCOUNTING ######################
################################################################

exponential_discounting <- function(mu0, sig2_0, r, x){
  bf.dat <- data.frame(mu0 = NA, sig2_0 = NA, mu1 = NA, x = NA)
  
  # variance of prediction
  sig2_x <- exp(r * x)
  
  # updating
  mu1 <- mu0 * ((1/sig2_0) / ((1/sig2_0) + (1/sig2_x))) + x * ((1/sig2_x) / ((1/sig2_0) + (1/sig2_x)))
  sig1 <- 1/(1/sig2_0 + 1/sig2_x)
  bf.dat[1, ] <- c(mu0, sig2_0, mu1, x)
  return(bf.dat)
}

r.seq <- seq(from = 0.3, to = 0.8, by = 0.05)
observation.seq <- c(3000, 4000, 5000)
## building a data frame to summarize power analysis
dat_power <- data.frame("r" = rep(r.seq, length(observation.seq)),
                        "observations" = rep(observation.seq, each = length(r.seq)),
                        "expectedTreatmentOverSig0" = 0,
                        "expectedEffectOverSig0" = 0,
                        "significant.all_mod1" = 0,
                        "attempts" = 0, 
                        "share.all_mod1" = NA)
for(r0 in  r.seq){
  for(N0 in observation.seq){
    set.seed(1001)
    simulations <- 250
    k <- 1
    for(k in 1:simulations){
      observations <- N0
      
      dat_ld <- data.frame(mu = NA, sig = NA, x = NA)
      i <- 1
      for(i in 1:observations){
        s2_0 <- 25 # variance of sigma^2_0
        temp <- exponential_discounting(mu0 = 0, sig2_0 = s2_0,  r = r0, round(runif(1, 0, 20))) # use ED function
        temp$mu1 <- temp$mu1 + rnorm(1, 0, sqrt(s2_0))
        dat_ld[i, ] <- temp[, c(3, 2, 4)]
        i <- i + 1 #change to i
      }
      
      ## model with cubic regression
      mod_ld1 <- lm_robust(mu ~  I(x) + I(x^2) + I(x^3), 
                           data = dat_ld,
                           se = "stata")
      
      ## add +1 to account for number of attempts
      dat_power[dat_power$r == r0, "attempts"] <- k
      
      #############################################
      ######## SUMMARY STATISTICS MODEL 2 #########
      #############################################
      
      ############### ALL COEFFICIENTS ############
      
      dat_power[dat_power$r == r0 & dat_power$observations == observations, "significant.all_mod1"] <- dat_power[dat_power$r == r0 & dat_power$observations == observations, "significant.all_mod1"] + 
        ifelse(all(summary(mod_ld1)$coef[2:4, 4] <= 0.05), 1, 0)
      
      dat_power[dat_power$r == r0 & dat_power$observations == observations, "share.all_mod1"] <- dat_power[dat_power$r == r0 & dat_power$observations == observations, "significant.all_mod1"] / 
        dat_power[dat_power$r == r0 & dat_power$observations == observations, "attempts"]
 
      k <- k + 1
    }
    dat_power[dat_power$r == r0, "expectedTreatmentOverSig0"] <- exponential_discounting(0, 25, r0, 10)[3]/5
    dat_power[dat_power$r == r0, "expectedEffectOverSig0"] <- Reduce("+", sapply(0:20, function(x) exponential_discounting(0, 25, r0, x)[3]))/ (21 * 5)
  }
  print(dat_power[dat_power$r == r0, ])
}

dat_power_ed <- dat_power

#############################################################
###################### PLOTTING POWER #######################
#############################################################

plot_power_nd_linear <- ggplot(dat_power_nd, aes(ate_over_sig2_0, share)) +
  geom_point(aes(colour = factor(observations))) +
  geom_line(aes(colour = factor(observations))) +
  geom_hline(yintercept = .8,
             linetype = "dashed") +
  ylim(0, 1) +
  theme_minimal() +
  scale_x_continuous(limits = c(.1, 0.7), breaks = seq(from = 0.1, to = 0.7, by = 0.1)) +
  labs(colour = "Observations") +
  ggtitle("No Discounting: $p(x) = 1$") +
  xlab("Expected Effect / $\\sigma_0$") +
  ylab("Share Significant") +
  theme(plot.title = element_text(hjust = 0.5),
        aspect.ratio=1)


plot_power_ld_square <- ggplot(dat_power_ld, aes(expectedEffectOverSig0, share.all_mod2, colour = factor(observations))) +
  geom_point() +
  geom_line() +
  geom_hline(yintercept = .8,
             linetype = "dotted") +
  ylim(0, 1) +
  geom_line() +
  theme_minimal() +
  scale_x_continuous(limits = c(.1, .7), breaks = seq(from = 0.1, to = .7, by = 0.1)) +
  labs(colour = "Observations") +
  xlab("Expected Effect / $\\sigma_0$") +
  ylab("Share All Significant ($x$ and $x^2$)") +
  ggtitle("Linear Discounting: $p(x) = 2$") +
  theme(plot.title = element_text(hjust = 0.5),
        aspect.ratio=1)


plot_power_ed_cubic <- ggplot(dat_power_ed, aes(expectedEffectOverSig0, share.all_mod1, colour = factor(observations))) +
  geom_point() +
  geom_line() +
  geom_hline(yintercept = .8,
             linetype = "dashed") +
  ylim(0, 1) +
  geom_line() +
  theme_minimal() +
  scale_x_continuous(limits = c(.1, 0.7), breaks = seq(from = 0.1, to = 0.7, by = 0.1)) +
  labs(colour = "Observations") +
  xlab("Expected Effect / $\\sigma_0$") +
  ylab("Share All Significant ($x^1, x^2$ and $x^3$)") +
  ggtitle("Exponential Discounting: $p(x) = 3$") +
  theme(plot.title = element_text(hjust = 0.5),
        aspect.ratio=1)

tikz(file = "./output/figures/fig_power_analysis.tex", width = 5, height = 9.5)
ggarrange(plot_power_nd_linear,
          plot_power_ld_square,
          plot_power_ed_cubic,
          nrow = 3,
          common.legend = TRUE, 
          legend = "bottom")
dev.off()

####################################################
################# APPENDIX FIGURE 13 ###############
####################################################

#####################################################
################## LINEAR DISCOUNTING ###############
#####################################################

linear_discounting <- function(mu0, sig2_0, b, x){
  bf.dat <- data.frame(mu0 = NA, sig2_0 = NA, mu1 = NA, x = NA)
  
  # variance of prediction
  # s2_x = b
  sig2_x <- 1 + b * abs(mu0 - x)
  
  # updating
  mu1 <- mu0 * ((1/sig2_0) / ((1/sig2_0) + (1/sig2_x))) + x * ((1/sig2_x) / ((1/sig2_0) + (1/sig2_x)))
  sig1 <- 1/(1/sig2_0 + 1/sig2_x)
  bf.dat[1, ] <- c(mu0, sig2_0, mu1, x)
  return(bf.dat)
}


## building a data frame to summarize power analysis

b.seq <- seq(from = 2, to = 20, by = 1)
N0 <- 4000
dat_bic <- data.frame("b" = b.seq,
                      "observations" = N0,
                      "bic_mod1" = 0,
                      "bic_mod2" = 0,
                      "bic_mod3" = 0,
                      "bic_mod4" = 0,
                      "bic_mod5" = 0,
                      "aic_mod1" = 0,
                      "aic_mod2" = 0,
                      "aic_mod3" = 0,
                      "aic_mod4" = 0,
                      "aic_mod5" = 0)

for(b0 in b.seq){
  set.seed(1001)
  simulations <- 250
  k <- 1
  for(k in 1:simulations){
    observations <- N0
    
    dat_ld <- data.frame(mu = NA, sig = NA, x = NA)
    i <- 1 
    for(i in 1:observations){
      s2_0 <- 25 # variance of sigma^2_0
      temp <- linear_discounting(mu0 = 0, sig2_0 = s2_0,  b = b0, round(runif(1, 0, 20))) # use LD function
      temp$mu1 <- temp$mu1 + rnorm(1, 0, sqrt(s2_0))
      dat_ld[i, ] <- temp[, c(3, 2, 4)]
      i <- i + 1 
    }
    
    # linear model
    mod_ld1 <- lm(mu ~  x, 
                  data = dat_ld)
    
    ## model with sinh^-1 transformation
    mod_ld2 <- lm(mu ~  log(x + ((x^2 +1)^0.5)), 
                  data = dat_ld)
    
    ## model with qudratic term
    mod_ld3 <- lm(mu ~  I(x) + I(x^2), 
                  data = dat_ld)
    
    ## model with cubic term
    mod_ld4 <- lm(mu ~  I(x) + I(x^2) + I(x^3), 
                  data = dat_ld)
    
    ## model dummy variable regression
    mod_ld5 <- lm(mu ~  (x >= 5 & x < 10) + (x >= 10 & x < 15) + (x >= 15 & x < 20), 
                  data = dat_ld)
    
    dat_bic <- dat_bic %>% 
      mutate(bic_mod1 = ifelse(b == b0, bic_mod1 + BIC(mod_ld1), bic_mod1),
             bic_mod2 = ifelse(b == b0, bic_mod2 + BIC(mod_ld2), bic_mod2),
             bic_mod3 = ifelse(b == b0, bic_mod3 + BIC(mod_ld3), bic_mod3),
             bic_mod4 = ifelse(b == b0, bic_mod4 + BIC(mod_ld4), bic_mod4),
             bic_mod5 = ifelse(b == b0, bic_mod5 + BIC(mod_ld5), bic_mod5),
             aic_mod1 = ifelse(b == b0, aic_mod1 + AIC(mod_ld1), aic_mod1),
             aic_mod2 = ifelse(b == b0, aic_mod2 + AIC(mod_ld2), aic_mod2),
             aic_mod3 = ifelse(b == b0, aic_mod3 + AIC(mod_ld3), aic_mod3),
             aic_mod4 = ifelse(b == b0, aic_mod4 + AIC(mod_ld4), aic_mod4),
             aic_mod5 = ifelse(b == b0, aic_mod5 + AIC(mod_ld5), aic_mod5))
    k <- k + 1
  }
}
dat_bic <- dat_bic %>% 
  mutate(bic_mod1 = bic_mod1/simulations,
         bic_mod2 = bic_mod2/simulations,
         bic_mod3 = bic_mod3/simulations,
         bic_mod4 = bic_mod4/simulations,
         bic_mod5 = bic_mod5/simulations,
         aic_mod1 = aic_mod1/simulations,
         aic_mod2 = aic_mod2/simulations,
         aic_mod3 = aic_mod3/simulations,
         aic_mod4 = aic_mod4/simulations,
         aic_mod5 = aic_mod5/simulations)

dat_bic_ld <- dat_bic

dat_bic_ld <- dat_bic_ld %>%
  mutate(avgBIC = (bic_mod1 + bic_mod2 + bic_mod3 + bic_mod4 + bic_mod5)/5,
         avgAIC = (aic_mod1 + aic_mod2 + aic_mod3 + aic_mod4 + aic_mod5)/5)

dat_bic_ld <- dat_bic_ld %>%
  mutate(bic_mod1_avg = bic_mod1 - avgBIC,
         bic_mod2_avg = bic_mod2 - avgBIC,
         bic_mod3_avg = bic_mod3 - avgBIC,
         bic_mod4_avg = bic_mod4 - avgBIC,
         bic_mod5_avg = bic_mod5 - avgBIC,
         aic_mod1_avg = aic_mod1 - avgAIC,
         aic_mod2_avg = aic_mod2 - avgAIC,
         aic_mod3_avg = aic_mod3 - avgAIC,
         aic_mod4_avg = aic_mod4 - avgAIC,
         aic_mod5_avg = aic_mod5 - avgAIC)

dat_bic_ld_gg <- (reshape2::melt(reshape2::melt(dat_bic_ld, id.vars = "b"), id.vars = c("variable", "b")))
dat_bic_ld_gg <- dat_bic_ld_gg[, -3]

dat_aic_ld_gg <- dat_bic_ld_gg %>%
  filter(variable == "aic_mod1_avg" |
           variable == "aic_mod2_avg" |
           variable == "aic_mod3_avg" |
           variable == "aic_mod4_avg" |
           variable == "aic_mod5_avg")

dat_aic_ld_gg <- dat_aic_ld_gg %>%
  mutate(variable = recode(variable, "'aic_mod1_avg' = 'Linear';
                           'aic_mod2_avg' = 'ArcSinh(x)';
                           'aic_mod3_avg' = 'Quadratic';
                           'aic_mod4_avg' = 'Cubic';
                           'aic_mod5_avg' = 'Dummy'"))

dat_aic_ld_gg <- dat_aic_ld_gg %>%
  rename(model = variable)

dat_aic_ld_gg <- dat_aic_ld_gg %>%
  rowwise() %>%
  mutate(exp_eff = Reduce("+", sapply(0:20, function(x) linear_discounting(0, 25, b, x)[3]))/ (21 * 5))

dat_aic_ld_gg <- dat_aic_ld_gg %>% filter(model != "ArcSinh(x)")

dat_bic_ld_gg <- dat_bic_ld_gg %>%
  filter(variable == "bic_mod1_avg" |
           variable == "bic_mod2_avg" |
           variable == "bic_mod3_avg" |
           variable == "bic_mod4_avg" |
           variable == "bic_mod5_avg")


dat_bic_ld_gg <- dat_bic_ld_gg %>%
  mutate(variable = recode(variable, "'bic_mod1_avg' = 'Linear';
                           'bic_mod2_avg' = 'ArcSinh(x)';
                           'bic_mod3_avg' = 'Quadratic';
                           'bic_mod4_avg' = 'Cubic';
                           'bic_mod5_avg' = 'Dummy'"))


dat_bic_ld_gg <- dat_bic_ld_gg %>%
  rename(model = variable)

dat_bic_ld_gg <- dat_bic_ld_gg %>% filter(model != "ArcSinh(x)")

dat_bic_ld_gg <- dat_bic_ld_gg %>% 
  rowwise() %>%
  mutate(exp_eff = Reduce("+", sapply(0:20, function(x) linear_discounting(0, 25, b, x)[3]))/ (21 * 5))

###################################################
############ EXPONENTIAL DISCOUNTING ##############
###################################################

exponential_discounting <- function(mu0, sig2_0, r, x){
  bf.dat <- data.frame(mu0 = NA, sig2_0 = NA, mu1 = NA, x = NA)
  
  # variance of prediction
  # s2_x = b
  sig2_x <- exp(r * x)
  
  # updating
  mu1 <- mu0 * ((1/sig2_0) / ((1/sig2_0) + (1/sig2_x))) + x * ((1/sig2_x) / ((1/sig2_0) + (1/sig2_x)))
  sig1 <- 1/(1/sig2_0 + 1/sig2_x)
  bf.dat[1, ] <- c(mu0, sig2_0, mu1, x)
  return(bf.dat)
}


## building a data frame to summarize power analysis

r.seq <- seq(from = 0.15, to = 0.8, by = 0.05)
N0 <- 4000
dat_bic <- data.frame("r" = r.seq,
                      "observations" = N0,
                      "bic_mod1" = 0,
                      "bic_mod2" = 0,
                      "bic_mod3" = 0,
                      "bic_mod4" = 0,
                      "bic_mod5" = 0,
                      "aic_mod1" = 0,
                      "aic_mod2" = 0,
                      "aic_mod3" = 0,
                      "aic_mod4" = 0,
                      "aic_mod5" = 0)

for(r0 in r.seq){
  set.seed(1001)
  simulations <- 250
  k <- 1
  for(k in 1:simulations){
    observations <- N0
    
    dat_ed <- data.frame(mu = NA, sig = NA, x = NA)
    i <- 1 
    for(i in 1:observations){
      s2_0 <- 25 # variance of sigma^2_0
      temp <- exponential_discounting(mu0 = 0, sig2_0 = s2_0,  r = r0, round(runif(1, 0, 20))) # use LD function
      temp$mu1 <- temp$mu1 + rnorm(1, 0, sqrt(s2_0))
      dat_ed[i, ] <- temp[, c(3, 2, 4)]
      i <- i + 1 #change to i
    }
    
    # linear model
    mod_ed1 <- lm(mu ~  x, 
                  data = dat_ed)
    
    ## model with sinh^-1 transformation
    mod_ed2 <- lm(mu ~  log(x + ((x^2 +1)^0.5)), 
                  data = dat_ed)
    
    ## model with quadratic term
    mod_ed3 <- lm(mu ~  I(x) + I(x^2), 
                  data = dat_ed)
    
    ## model with cubic term
    mod_ed4 <- lm(mu ~  I(x) + I(x^2) + I(x^3), 
                  data = dat_ed)
    
    ## model dummy variable regression
    mod_ed5 <- lm(mu ~  (x >= 5 & x < 10) + (x >= 10 & x < 15) + (x >= 15 & x < 20), 
                  data = dat_ed)
    
    dat_bic <- dat_bic %>% 
      mutate(bic_mod1 = ifelse(r == r0, bic_mod1 + BIC(mod_ed1), bic_mod1),
             bic_mod2 = ifelse(r == r0, bic_mod2 + BIC(mod_ed2), bic_mod2),
             bic_mod3 = ifelse(r == r0, bic_mod3 + BIC(mod_ed3), bic_mod3),
             bic_mod4 = ifelse(r == r0, bic_mod4 + BIC(mod_ed4), bic_mod4),
             bic_mod5 = ifelse(r == r0, bic_mod5 + BIC(mod_ed5), bic_mod5),
             aic_mod1 = ifelse(r == r0, aic_mod1 + AIC(mod_ed1), aic_mod1),
             aic_mod2 = ifelse(r == r0, aic_mod2 + AIC(mod_ed2), aic_mod2),
             aic_mod3 = ifelse(r == r0, aic_mod3 + AIC(mod_ed3), aic_mod3),
             aic_mod4 = ifelse(r == r0, aic_mod4 + AIC(mod_ed4), aic_mod4),
             aic_mod5 = ifelse(r == r0, aic_mod5 + AIC(mod_ed5), aic_mod5))
    k <- k + 1
  }
}
dat_bic <- dat_bic %>% 
  mutate(bic_mod1 = bic_mod1/simulations,
         bic_mod2 = bic_mod2/simulations,
         bic_mod3 = bic_mod3/simulations,
         bic_mod4 = bic_mod4/simulations,
         bic_mod5 = bic_mod5/simulations,
         aic_mod1 = aic_mod1/simulations,
         aic_mod2 = aic_mod2/simulations,
         aic_mod3 = aic_mod3/simulations,
         aic_mod4 = aic_mod4/simulations,
         aic_mod5 = aic_mod5/simulations)

dat_bic_ed <- dat_bic

dat_bic_ed <- dat_bic_ed %>%
  mutate(avgBIC = (bic_mod1 + bic_mod2 + bic_mod3 + bic_mod4 + bic_mod5)/5,
         avgAIC = (aic_mod1 + aic_mod2 + aic_mod3 + aic_mod4 + aic_mod5)/5)

dat_bic_ed <- dat_bic_ed %>%
  mutate(bic_mod1_avg = bic_mod1 - avgBIC,
         bic_mod2_avg = bic_mod2 - avgBIC,
         bic_mod3_avg = bic_mod3 - avgBIC,
         bic_mod4_avg = bic_mod4 - avgBIC,
         bic_mod5_avg = bic_mod5 - avgBIC,
         aic_mod1_avg = aic_mod1 - avgAIC,
         aic_mod2_avg = aic_mod2 - avgAIC,
         aic_mod3_avg = aic_mod3 - avgAIC,
         aic_mod4_avg = aic_mod4 - avgAIC,
         aic_mod5_avg = aic_mod5 - avgAIC)


dat_bic_ed_gg <- (reshape2::melt(reshape2::melt(dat_bic_ed, id.vars = "r"), id.vars = c("variable", "r")))
dat_bic_ed_gg <- dat_bic_ed_gg[, -3]


dat_aic_ed_gg <- dat_bic_ed_gg %>%
  filter(variable == "aic_mod1_avg" |
           variable == "aic_mod2_avg" |
           variable == "aic_mod3_avg" |
           variable == "aic_mod4_avg" |
           variable == "aic_mod5_avg")

dat_aic_ed_gg <- dat_aic_ed_gg %>%
  mutate(variable = recode(variable, "'aic_mod1_avg' = 'Linear';
                           'aic_mod2_avg' = 'ArcSinh(x)';
                           'aic_mod3_avg' = 'Quadratic';
                           'aic_mod4_avg' = 'Cubic';
                           'aic_mod5_avg' = 'Dummy'"))

dat_aic_ed_gg <- dat_aic_ed_gg %>%
  rename(model = variable)

dat_aic_ed_gg <- dat_aic_ed_gg %>%
  rowwise() %>%
  mutate(exp_eff = Reduce("+", sapply(0:20, function(x) exponential_discounting(0, 25, r, x)[3]))/ (21 * 5))

dat_aic_ed_gg <- dat_aic_ed_gg %>% filter(model != "ArcSinh(x)")

dat_bic_ed_gg <- dat_bic_ed_gg %>%
  filter(variable == "bic_mod1_avg" |
           variable == "bic_mod2_avg" |
           variable == "bic_mod3_avg" |
           variable == "bic_mod4_avg" |
           variable == "bic_mod5_avg")


dat_bic_ed_gg <- dat_bic_ed_gg %>%
  mutate(variable = recode(variable, "'bic_mod1_avg' = 'Linear';
                           'bic_mod2_avg' = 'ArcSinh(x)';
                           'bic_mod3_avg' = 'Quadratic';
                           'bic_mod4_avg' = 'Cubic';
                           'bic_mod5_avg' = 'Dummy'"))


dat_bic_ed_gg <- dat_bic_ed_gg %>%
  rename(model = variable)

dat_bic_ed_gg <- dat_bic_ed_gg %>% filter(model != "ArcSinh(x)")

dat_bic_ed_gg <- dat_bic_ed_gg %>% 
  rowwise() %>%
  mutate(exp_eff = Reduce("+", sapply(0:20, function(x) exponential_discounting(0, 25, r, x)[3]))/ (21 * 5))


###################################################
################### NO DISCOUNTING ################
###################################################


no_discounting <- function(mu0, sig2_0, sig2_x, x){
  bf.dat <- data.frame(mu0 = NA, sig2_0 = NA, mu1 = NA, x = NA)
  
  
  # updating
  mu1 <- mu0 * ((1/sig2_0) / ((1/sig2_0) + (1/sig2_x))) + x * ((1/sig2_x) / ((1/sig2_0) + (1/sig2_x)))
  sig1 <- 1/(1/sig2_0 + 1/sig2_x)
  bf.dat[1, ] <- c(mu0, sig2_0, mu1, x)
  return(bf.dat)
}

## building a data frame to summarize power analysis

sig2_x.seq <- seq(50, 305, 15)
N0 <- 4000
dat_bic <- data.frame("sig2_x" = sig2_x.seq,
                      "observations" = N0,
                      "bic_mod1" = 0,
                      "bic_mod2" = 0,
                      "bic_mod3" = 0,
                      "bic_mod4" = 0,
                      "bic_mod5" = 0,
                      "aic_mod1" = 0,
                      "aic_mod2" = 0,
                      "aic_mod3" = 0,
                      "aic_mod4" = 0,
                      "aic_mod5" = 0)

for(sig2_x0 in sig2_x.seq){
  set.seed(1001)
  simulations <- 250
  k <- 1
  for(k in 1:simulations){
    observations <- N0
    
    dat_nd <- data.frame(mu = NA, sig = NA, x = NA)
    i <- 1 
    for(i in 1:observations){
      s2_0 <- 25 # variance of sigma^2_0
      temp <- no_discounting(mu0 = 0, sig2_0 = s2_0,  sig2_x = sig2_x0, round(runif(1, 0, 20))) # use LD function
      temp$mu1 <- temp$mu1 + rnorm(1, 0, sqrt(s2_0))
      dat_nd[i, ] <- temp[, c(3, 2, 4)]
      i <- i + 1
    }
    
    # linear model
    mod_nd1 <- lm(mu ~  x, 
                  data = dat_nd)
    
    ## model with sinh^-1 transformation
    mod_nd2 <- lm(mu ~  log(x + ((x^2 +1)^0.5)), 
                  data = dat_nd)
    
    ## model with quadratic term
    mod_nd3 <- lm(mu ~  I(x) + I(x^2), 
                  data = dat_nd)
    
    ## model with cubic term
    mod_nd4 <- lm(mu ~  I(x) + I(x^2) + I(x^3), 
                  data = dat_nd)
    
    ## model dummy variable regression
    mod_nd5 <- lm(mu ~  (x >= 5 & x < 10) + (x >= 10 & x < 15) + (x >= 15 & x < 20), 
                  data = dat_nd)
    
    dat_bic <- dat_bic %>% 
      mutate(bic_mod1 = ifelse(sig2_x == sig2_x0, bic_mod1 + BIC(mod_nd1), bic_mod1),
             bic_mod2 = ifelse(sig2_x == sig2_x0, bic_mod2 + BIC(mod_nd2), bic_mod2),
             bic_mod3 = ifelse(sig2_x == sig2_x0, bic_mod3 + BIC(mod_nd3), bic_mod3),
             bic_mod4 = ifelse(sig2_x == sig2_x0, bic_mod4 + BIC(mod_nd4), bic_mod4),
             bic_mod5 = ifelse(sig2_x == sig2_x0, bic_mod5 + BIC(mod_nd5), bic_mod5),
             aic_mod1 = ifelse(sig2_x == sig2_x0, aic_mod1 + AIC(mod_nd1), aic_mod1),
             aic_mod2 = ifelse(sig2_x == sig2_x0, aic_mod2 + AIC(mod_nd2), aic_mod2),
             aic_mod3 = ifelse(sig2_x == sig2_x0, aic_mod3 + AIC(mod_nd3), aic_mod3),
             aic_mod4 = ifelse(sig2_x == sig2_x0, aic_mod4 + AIC(mod_nd4), aic_mod4),
             aic_mod5 = ifelse(sig2_x == sig2_x0, aic_mod5 + AIC(mod_nd5), aic_mod5))
    k <- k + 1
  }
}
dat_bic <- dat_bic %>% 
  mutate(bic_mod1 = bic_mod1/simulations,
         bic_mod2 = bic_mod2/simulations,
         bic_mod3 = bic_mod3/simulations,
         bic_mod4 = bic_mod4/simulations,
         bic_mod5 = bic_mod5/simulations,
         aic_mod1 = aic_mod1/simulations,
         aic_mod2 = aic_mod2/simulations,
         aic_mod3 = aic_mod3/simulations,
         aic_mod4 = aic_mod4/simulations,
         aic_mod5 = aic_mod5/simulations)

dat_bic_nd <- dat_bic

dat_bic_nd <- dat_bic_nd %>%
  mutate(avgBIC = (bic_mod1 + bic_mod2 + bic_mod3 + bic_mod4 + bic_mod5)/5,
         avgAIC = (aic_mod1 + aic_mod2 + aic_mod3 + aic_mod4 + aic_mod5)/5)

dat_bic_nd <- dat_bic_nd %>%
  mutate(bic_mod1_avg = bic_mod1 - avgBIC,
         bic_mod2_avg = bic_mod2 - avgBIC,
         bic_mod3_avg = bic_mod3 - avgBIC,
         bic_mod4_avg = bic_mod4 - avgBIC,
         bic_mod5_avg = bic_mod5 - avgBIC,
         aic_mod1_avg = aic_mod1 - avgAIC,
         aic_mod2_avg = aic_mod2 - avgAIC,
         aic_mod3_avg = aic_mod3 - avgAIC,
         aic_mod4_avg = aic_mod4 - avgAIC,
         aic_mod5_avg = aic_mod5 - avgAIC)

dat_bic_nd_gg <- (reshape2::melt(reshape2::melt(dat_bic_nd, id.vars = "sig2_x"), id.vars = c("variable", "sig2_x")))
dat_bic_nd_gg <- dat_bic_nd_gg[, -3]

dat_aic_nd_gg <- dat_bic_nd_gg %>%
  filter(variable == "aic_mod1_avg" |
           variable == "aic_mod2_avg" |
           variable == "aic_mod3_avg" |
           variable == "aic_mod4_avg" |
           variable == "aic_mod5_avg")

dat_aic_nd_gg <- dat_aic_nd_gg %>%
  mutate(variable = recode(variable, "'aic_mod1_avg' = 'Linear';
                           'aic_mod2_avg' = 'ArcSinh(x)';
                           'aic_mod3_avg' = 'Quadratic';
                           'aic_mod4_avg' = 'Cubic';
                           'aic_mod5_avg' = 'Dummy'"))

dat_aic_nd_gg <- dat_aic_nd_gg %>%
  rename(model = variable)

dat_aic_nd_gg <- dat_aic_nd_gg %>%
  mutate(exp_eff = 10 * ((1/sig2_x) / ((1/25) + (1/sig2_x))) / sqrt(25))

dat_aic_nd_gg <- dat_aic_nd_gg %>% filter(model != "ArcSinh(x)")


dat_bic_nd_gg <- dat_bic_nd_gg %>%
  filter(variable == "bic_mod1_avg" |
           variable == "bic_mod2_avg" |
           variable == "bic_mod3_avg" |
           variable == "bic_mod4_avg" |
           variable == "bic_mod5_avg")


dat_bic_nd_gg <- dat_bic_nd_gg %>%
  mutate(variable = recode(variable, "'bic_mod1_avg' = 'Linear';
                           'bic_mod2_avg' = 'ArcSinh(x)';
                           'bic_mod3_avg' = 'Quadratic';
                           'bic_mod4_avg' = 'Cubic';
                           'bic_mod5_avg' = 'Dummy'"))


dat_bic_nd_gg <- dat_bic_nd_gg %>%
  rename(model = variable)


dat_bic_nd_gg <- dat_bic_nd_gg %>%
  mutate(exp_eff = 10 * ((1/sig2_x) / ((1/25) + (1/sig2_x))) / sqrt(25))

dat_bic_nd_gg <- dat_bic_nd_gg %>% filter(model != "ArcSinh(x)")



##################################################################
#################### PLOTTING MODEL SELECTION ####################
##################################################################

## reorder factors 

dat_bic_nd_gg <- dat_bic_nd_gg %>% 
  mutate(model = factor(model, levels = c("Linear", "Quadratic", "Cubic", "Dummy", "ArcSinh(x)")))
dat_aic_nd_gg <- dat_aic_nd_gg %>% 
  mutate(model = factor(model, levels = c("Linear", "Quadratic", "Cubic", "Dummy", "ArcSinh(x)")))

dat_bic_ld_gg <- dat_bic_ld_gg %>% 
  mutate(model = factor(model, levels = c("Linear", "Quadratic", "Cubic", "Dummy", "ArcSinh(x)")))
dat_aic_ld_gg <- dat_aic_ld_gg %>% 
  mutate(model = factor(model, levels = c("Linear", "Quadratic", "Cubic", "Dummy", "ArcSinh(x)")))

dat_bic_ed_gg <- dat_bic_ed_gg %>% 
  mutate(model = factor(model, levels = c("Linear", "Quadratic", "Cubic", "Dummy", "ArcSinh(x)")))
dat_aic_ed_gg <- dat_aic_ed_gg %>% 
  mutate(model = factor(model, levels = c("Linear", "Quadratic", "Cubic", "Dummy", "ArcSinh(x)")))


plot_nd_bic <- ggplot(dat_bic_nd_gg, aes(x = exp_eff, y = value)) +
  geom_point(aes(colour = model,
                 shape = model),
             size = 3,
             alpha = .8) +
  geom_line(aes(colour = model)) +
  theme_minimal() +
  coord_cartesian(ylim = c(-50, 50)) +
  scale_y_continuous(breaks = seq(from = -50, to = 50, by = 10)) +
  scale_x_continuous(limits = c(0.1, 0.7), breaks = seq(from = 0.1, to = 0.7, by = 0.1)) +
  ggtitle("No Discounting") +
  ylab("BIC") +
  xlab("Expected Effect / $\\sigma_0$") +
  labs(colour = "Model", shape = "Model") +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_colour_discrete()

plot_ld_bic <- ggplot(dat_bic_ld_gg, aes(x = exp_eff, y = value)) +
  geom_point(aes(colour = model,
                 shape = model),
             size = 3,
             alpha = .8) +
  geom_line(aes(colour = model)) +
  theme_minimal() +
  scale_x_continuous(limits = c(0.1, 1), breaks = seq(from = 0.1, to = 1, by = 0.1)) +
  coord_cartesian(ylim = c(-50, 50)) +
  scale_y_continuous(breaks = seq(from = -50, to = 50, by = 10)) +
  ggtitle("Linear Discounting") +
  ylab("BIC") +
  xlab("Expected Effect / $\\sigma_0$") +
  labs(colour = "Model", shape = "Model") +
  theme(plot.title = element_text(hjust = 0.5))


plot_ed_bic <- ggplot(dat_bic_ed_gg, aes(x = exp_eff, y = value)) +
  geom_point(aes(colour = model,
                 shape = model),
             size = 3,
             alpha = .8) +
  geom_line(aes(colour = model)) +
  theme_minimal() +
  coord_cartesian(ylim = c(-175, 175)) +
  scale_y_continuous(breaks = seq(from = -175, to = 175, by = 25)) +
  scale_x_continuous(limits = c(0, 1.5), breaks = seq(from = 0, to = 1.5, by = 0.25)) +
  ggtitle("Exponential Discounting") +
  ylab("BIC") +
  xlab("Expected Effect / $\\sigma^2_0$") +
  labs(colour = "Model", shape = "Model") +
  theme(plot.title = element_text(hjust = 0.5))

plot_nd_aic <- ggplot(dat_aic_nd_gg, aes(x = exp_eff, y = value)) +
  geom_point(aes(colour = model,
                 shape = model),
             size = 3,
             alpha = .8) +
  geom_line(aes(colour = model)) +
  theme_minimal() +
  coord_cartesian(ylim = c(-50, 50)) +
  scale_y_continuous(breaks = seq(from = -50, to = 50, by = 10)) +
  scale_x_continuous(limits = c(0.1, 0.7), breaks = seq(from = 0.1, to = 0.7, by = 0.1)) +
  ggtitle("No Discounting") +
  ylab("AIC") +
  xlab("Expected Effect / $\\sigma_0$") +
  labs(colour = "Model", shape = "Model") +
  theme(plot.title = element_text(hjust = 0.5))

plot_ld_aic <- ggplot(dat_aic_ld_gg, aes(x = exp_eff, y = value)) +
  geom_point(aes(colour = model,
                 shape = model),
             size = 3,
             alpha = .8) +
  geom_line(aes(colour = model)) +
  theme_minimal() +
  scale_x_continuous(limits = c(0.1, 1), breaks = seq(from = 0.1, to = 1, by = 0.1)) +
  coord_cartesian(ylim = c(-50, 50)) +
  scale_y_continuous(breaks = seq(from = -50, to = 50, by = 10)) +
  ggtitle("Linear Discounting") +
  ylab("AIC") +
  xlab("Expected Effect / $\\sigma_0$") +
  labs(colour = "Model", shape = "Model") +
  theme(plot.title = element_text(hjust = 0.5))

plot_ed_aic <- ggplot(dat_aic_ed_gg, aes(x = exp_eff, y = value)) +
  geom_point(aes(colour = model,
                 shape = model),
             size = 3,
             alpha = .8) +
  geom_line(aes(colour = model)) +
  theme_minimal() +
  scale_x_continuous(limits = c(0, 1.5), breaks = seq(from = 0, to = 1.5, by = 0.25)) +
  coord_cartesian(ylim = c(-175, 175)) +
  scale_y_continuous(breaks = seq(from = -175, to = 175, by = 25)) +
  ggtitle("Exponential Discounting") +
  ylab("AIC") +
  xlab("Expected Effect / $\\sigma_0$") +
  labs(colour = "Model", shape = "Model") +
  theme(plot.title = element_text(hjust = 0.5))

tikz(file = "../output/figures/fig_bic_comparisons.tex", width = 3.40, height = 11.6)
ggarrange(plot_nd_bic,
          plot_nd_aic,
          plot_ld_bic,
          plot_ld_aic,
          plot_ed_bic,
          plot_ed_aic,
          nrow = 3,
          ncol = 2, 
          legend="bottom")
dev.off()



